import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.dates as dates
import numpy as np
import pylab
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.graphics.gofplots import qqplot_2samples
import math
import copy
import warnings
import os
import csv
warnings.filterwarnings('ignore')
##Load data
dataUS = pd.read_csv("us.csv")
dataSize = len(dataUS.index)
dataStates = pd.read_csv("us-states.csv")
dataStateSize = len(dataStates.index)
# print(dataUS.loc[0], dataSize, "\n", dataStates.loc[0] ,dataStateSize)
# if (dataUS.loc[1]["date"]>dataUS.loc[0]["date"]):
# print("nice")
## US data
usDeath = []
for i in range(dataSize):
if (i - 1) < 0:
usDeath.append([dataUS.loc[i]["date"],dataUS.loc[i]["deaths"]])
else:
usDeath.append([dataUS.loc[i]["date"],dataUS.loc[i]["deaths"]-dataUS.loc[i-1]["deaths"]])
with open("clean-data/US.csv", 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(usDeath)
#print (usDeath)
# State data
statesDate = {}
statesData = {}
for i in range(dataStateSize):
if (dataStates.loc[i]["state"] in statesDate):
statesData[dataStates.loc[i]["state"]].append([dataStates.loc[i]["date"],dataStates.loc[i]["deaths"]-statesData[dataStates.loc[i]["state"]][-1][1]])
else:
statesDate[dataStates.loc[i]["state"]] = dataStates.loc[i]["date"]
statesData[dataStates.loc[i]["state"]] = []
# First COVID case
statesData[dataStates.loc[i]["state"]].append([dataStates.loc[i]["date"],dataStates.loc[i]["deaths"]])
for key in statesData:
path = "clean-data/" + key + ".csv"
print(path, " has been successfully created")
with open(path, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(statesData[key])
## Get latest date of COVID
#startDate = statesDate[max(statesDate,key=statesDate.get)]
#print(statesData)
clean-data/Washington.csv clean-data/Illinois.csv clean-data/California.csv clean-data/Arizona.csv clean-data/Massachusetts.csv clean-data/Wisconsin.csv clean-data/Texas.csv clean-data/Nebraska.csv clean-data/Utah.csv clean-data/Oregon.csv clean-data/Florida.csv clean-data/New York.csv clean-data/Rhode Island.csv clean-data/Georgia.csv clean-data/New Hampshire.csv clean-data/North Carolina.csv clean-data/New Jersey.csv clean-data/Colorado.csv clean-data/Maryland.csv clean-data/Nevada.csv clean-data/Tennessee.csv clean-data/Hawaii.csv clean-data/Indiana.csv clean-data/Kentucky.csv clean-data/Minnesota.csv clean-data/Oklahoma.csv clean-data/Pennsylvania.csv clean-data/South Carolina.csv clean-data/District of Columbia.csv clean-data/Kansas.csv clean-data/Missouri.csv clean-data/Vermont.csv clean-data/Virginia.csv clean-data/Connecticut.csv clean-data/Iowa.csv clean-data/Louisiana.csv clean-data/Ohio.csv clean-data/Michigan.csv clean-data/South Dakota.csv clean-data/Arkansas.csv clean-data/Delaware.csv clean-data/Mississippi.csv clean-data/New Mexico.csv clean-data/North Dakota.csv clean-data/Wyoming.csv clean-data/Alaska.csv clean-data/Maine.csv clean-data/Alabama.csv clean-data/Idaho.csv clean-data/Montana.csv clean-data/Puerto Rico.csv clean-data/Virgin Islands.csv clean-data/Guam.csv clean-data/West Virginia.csv clean-data/Northern Mariana Islands.csv
usDeathT = np.array(usDeath).transpose()
numDeath = usDeathT[1].astype(np.int)
#print(usDeathT[1])
fig, ax = plt.subplots(figsize=(20, 10))
converted_dates = dates.datestr2num(usDeathT[0])
ax.plot(converted_dates, numDeath)
ax.xaxis_date()
fig.autofmt_xdate()
plt.xlabel("Date")
plt.ylabel("Number of new deaths")
plt.title("Number of daily new deaths in US")
plt.show()
for key in statesData:
stateDeathT = np.array(statesData[key]).transpose()
numDeath = stateDeathT[1].astype(np.int)
#print(usDeathT[1])
fig, ax = plt.subplots(figsize=(20, 10))
converted_dates = dates.datestr2num(stateDeathT[0])
ax.plot(converted_dates, numDeath)
ax.xaxis_date()
fig.autofmt_xdate()
plt.xlabel("Date")
plt.ylabel("Number of new deaths")
plt.title("Number of daily new deaths in " + key)
plt.show()